clear all
capture log close
program drop _all
set more off
snapshot erase _all

log using "..\Programs\Preparing Merchandise and Service Export Numbers.log", replace

*** Preparing Merchandise and Service Export Numbers.do
*** 8/29/2016

*************************************
*** Define a program to clean country names
*************************************
program define CLEAN_COUNTRY_NAMES
	replace `1' = trim(`1')
	gen `1'_temp = `1'
	quietly replace `1'_temp = "Yemen, Rep." if strpos(`1', "Yemen") > 0 
	quietly replace `1'_temp = "Vietnam" if strpos(`1', "Viet") > 0
	quietly replace `1'_temp = "Venezuela, RB" if strpos(`1', "Venezuela") > 0
	quietly replace `1'_temp = "Macao SAR, China" if strpos(`1', "Macao") > 0
	quietly replace `1'_temp = "Hong Kong SAR, China" if strpos(`1', "Hong") > 0
	quietly replace `1'_temp = "China" if strpos(`1', "China") > 0 & strpos(`1', "Mainland") > 0
	quietly replace `1'_temp = "Lao PDR" if strpos(`1', "Lao") > 0
	quietly replace `1'_temp = "Korea, Dem. People’s Rep." if strpos(`1', "Korea") > 0 & (strpos(`1', "Dem") > 0 | strpos(`1', "North") > 0)
	quietly replace `1'_temp = "Korea, Rep." if strpos(`1', "Korea") > 0 & ((strpos(`1', "Dem") == 0  & strpos(`1', "North") == 0) | strpos(`1', "South") > 0)
	quietly replace `1'_temp = "Iran, Islamic Rep." if `1' == "Iran, Islamic Republic of"
	quietly replace `1'_temp = "Hong Kong SAR, China" if strpos(`1', "Hong Kong") > 0
	quietly replace `1'_temp = "Gambia, The" if strpos(`1', "Gambia") > 0
	quietly replace `1'_temp = "Congo, Rep." if strpos(`1', "Congo") > 0 & strpos(`1', "Dem") == 0
	quietly replace `1'_temp = "Congo, Dem. Rep." if strpos(`1', "Congo") > 0 & strpos(`1', "Dem") > 0
	quietly replace `1'_temp = "Congo, Dem. Rep." if strpos(`1', "Zaire") > 0
	quietly replace `1'_temp = "Bahamas, The" if strpos(`1', "Bahamas") > 0 
	quietly replace `1'_temp = "Egypt, Arab Rep." if strpos(`1', "Egypt") > 0 
	quietly replace `1'_temp = "Cote d'Ivoire" if strpos(`1', "Ivoire") > 0 
	quietly replace `1'_temp = "Bolivia" if strpos(`1', "Bolivia") > 0 
	quietly replace `1'_temp = "Tanzania" if strpos(`1', "Tanzania") > 0
	quietly replace `1'_temp = "Afghanistan" if strpos(`1', "Afghanistan") > 0
	quietly replace `1'_temp = "Sao Tome and Principe" if strpos(`1', "ncipe") > 0
	quietly replace `1'_temp = "Iran, Islamic Rep." if strpos(`1', "Iran") > 0
	quietly replace `1'_temp = "St. Vincent and the Grenadines" if strpos(`1', "St. Vincent")> 0 & strpos(`1', "Grenadines")> 0
	quietly replace `1'_temp = "Venezuela, RB" if strpos(`1', "Venezuela") > 0
	quietly replace `1'_temp = "Central African Republic" if strpos(`1', "Central African") > 0 
	quietly replace `1'_temp = "Kyrgyz Republic" if strpos(`1', "Kyrgyz") > 0 
	quietly replace `1'_temp = "Macedonia, FYR" if strpos(`1', "Macedonia") > 0 
	quietly replace `1'_temp = "Maldives" if strpos(`1', "Falkand") > 0 
	quietly replace `1'_temp = "Antigua and Barbuda" if strpos(`1', "Antigua") > 0 
	quietly replace `1'_temp = "Cabo Verde" if strpos(`1', "Verde") > 0 
	quietly replace `1'_temp = "Comoros" if strpos(`1', "Comoro") > 0
	quietly replace `1'_temp = "Guinea-Bissau" if strpos(`1', "Guinea") > 0 & strpos(`1', "Bissau") > 0
	quietly replace `1'_temp = "Haiti" if `1' == "Haïti"
	quietly replace `1'_temp = "Myanmar" if strpos(`1', "Burma") > 0
	quietly replace `1'_temp = "Syrian Arab Republic" if strpos(`1', "Syria") > 0
	quietly replace `1'_temp = "Brunei Darussalam" if strpos(`1', "Brunei") > 0 
	quietly replace `1'_temp = "St. Kitts and Nevis" if strpos(`1', "Kitts") >  0 & strpos(`1', "Nevis") > 0
	
	*** Show country names that we updated
	preserve
	keep if `1'_temp != `1'
	contract `1'_temp `1'
	list `1'_temp `1', ab(20) sep(100)
	restore
	replace `1' = `1'_temp
	drop `1'_temp
end

*************************************************************
*************************************************************
*** Services Exports vs. Merchandise Exports
*************************************************************
*************************************************************

*************************************
*** Read in the data
*************************************
wbopendata, clear indicator(TX.VAL.SERV.CD.WT) nometadata

drop if region == "Aggregates" | region == ""

*************************************
*** Prepare the data
*************************************

*** Reshape the data
rename yr* serv_exp* 
reshape long serv_exp, i(countryname) j(year)

*** Keep only necessary years
keep if year >= 1990 & year <= 2014 

*************************************
*** Clean missing data 
*** 1. For cases where the nonmissing data points are <= 5 years apart, we will interpolate the missing values, assuming a constant growth rate
*** 2. For cases where the missing data points are at the beginning of the series, we will extend the earliest ratio of merchandise exports to GDP backwards
*************************************

*** 1. For cases where the nonmissing data points are <= 5 years apart, we will interpolate the missing values, assuming a constant growth rate

ipolate serv_exp year if serv_exp < . | serv_exp == .a, by(countryname) gen(serv_exp_temp)
replace serv_exp = serv_exp_temp if serv_exp != .b
drop serv_exp_temp

*** 2. For cases where the missing data points are at the beginning of the series, we will extend the earliest ratio of exports to GDP backwards

*** Merge in GDP
preserve
wbopendata, clear indicator(NY.GDP.MKTP.CD) nometadata
drop if region == "Aggregates" | region == ""
rename yr* gdp*
reshape long gdp, i(countryname) j(year)
keep gdp year countryname 
tempfile gdp 
save `gdp'.dta, replace
restore
merge 1:1 countryname year using `gdp'.dta, assert(2 3) keep(3) nogen norep

*** Mark the earliest year of data
gen has_data = year if serv_exp < .
bys countryname: egen first_data = min(has_data)
drop has_data
*** Extend the earliest ratio of exports to GDP backwards for up to 5 years
gen first_ratio_temp = serv_exp/gdp if year == first_data
bys countryname: egen first_ratio = max(first_ratio_temp)
drop first_ratio_temp
replace serv_exp = first_ratio*gdp if serv_exp == .a & year < first_data & year >= first_data-5
drop first_data first_ratio

*** Save a list of countries and years with nonmissing data
preserve
keep if serv_exp < . & year >= 1990
keep countryname year
local temp_name = "serv_exp_nm1990_2015" 
tempfile `temp_name'
save "``temp_name''.dta", replace
restore


*** Add up global total 
collapse (sum) serv_exp, by(year)

*** Save tempfile
tempfile serv_exports
save `serv_exports'.dta, replace

*************************************************************
*************************************************************
*** GDP in international prices, 1990-2015 -- World Bank
*************************************************************
*************************************************************

*************************************
*** Read in the data
*************************************
wbopendata, clear indicator(NY.GDP.MKTP.PP.CD) nometadata

*** Keep only necessary values
drop if region == "Aggregates" | region == ""

*** Reshape the data
rename yr* gdp* 
reshape long gdp, i(countryname) j(year)

*** Keep only necessary years
keep if year >= 1990 & year <= 2014

*************************************
*** Remove GDP for countries/years that are missing data
***(because we are using GDP in the denominator only for countries with nonmissing export data)
*************************************

*** Remove GDP for countries/years that are missing service export data 
merge 1:1 countryname year using `serv_exp_nm1990_2015'.dta, assert(1 3)
gen serv_gdp = gdp
replace serv_gdp = . if _merge == 1
drop _merge

*************************************
*** Add up global total
*************************************
collapse (sum) serv_gdp, by(year)

*** Save tempfile
keep year serv_gdp
tempfile gdp1990_2014
save `gdp1990_2014'.dta, replace

*************************************************************
*************************************************************
*** Combine all data 
*************************************************************
*************************************************************
use `serv_exports'.dta, clear

*************************************
*** Merge in GDP data
*************************************
merge 1:1 year using `gdp1990_2014'.dta, assert(1 3 4) nogen update 
assert  serv_gdp < .

*************************************
*** Calculate service exports as % GDP
*************************************
gen serv_exp_pct = serv_exp/serv_gdp * 100

*************************************
*** Merge in merchandise exports as % GDP
*************************************
merge 1:1 year using "Merchandise Exports - Percent of World GDP.dta", assert(2 3)
keep if year >= 1990 & year <= 2015

*************************************
*** Graph service exports vs. merchandise exports
*************************************
label variable serv_exp_pct "Service Exports (% GDP)"
label variable merch_exp_pct "Merchandise Exports (% GDP)"
label variable year "Year"
format merch_exp_pct serv_exp_pct %10.0fc
twoway (line merch_exp_pct serv_exp_pct year, lwidth(thick thick)) ///
	(scatter merch_exp_pct serv_exp_pct year if year == ., msymbol(square square) mcolor("6 78 129"  "192 0 26")), ///
	ytitle("Exports (% GDP)") ytick(0(3)24, tlength(0)) ylabel(3(3)24, grid noticks) yscale(noline r(., 25) titlegap(2) ) ///
	xtitle("Year", color(gs6)) xlabel(1990(5)2015, angle(45) ) ///
	legend(cols(2) order(3 4) symysize(1) symxsize(1)) ///
	title("Merchandise and Service Exports, 1990-2014 ") plotregion(margin(zero)) 

graph export  "../Graphs/Service and Merchandise Exports.png", as(png) replace 

log close
